package cn.db.crawl.data.processor;

import cn.db.crawl.data.entity.BingImage;
import cn.hutool.core.util.StrUtil;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

import java.time.LocalDate;

/**
 * @author：zooooooooy
 * @date: 2022/12/1 - 16:25
 */
public class BingImageProcessor implements PageProcessor {

    @Override
    public void process(Page page) {
        // 数据接口
        // https://cn.bing.com/

        BingImage bingImage = new BingImage();
        bingImage.setImageDate(LocalDate.now());
        String imageUrl = page.getHtml().xpath("//div[@class='img_cont']")
                .regex("background-image: url\\((.*)\\)")
                .replace("&quot;", "").toString();
        bingImage.setImageUrl(imageUrl);

        if (StrUtil.isEmpty(imageUrl)) {
            page.setSkip(true);
        }

        page.putField("bingImage", bingImage);
    }

    @Override
    public Site getSite() {
        return Site.me().setDomain("cn.bing.com");
    }

}
